bitkeeper revision 1.1236.1.164 (424aed36IaX4v3-NGAT_MnCdUsD1yQ)
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Wed, 30 Mar 2005 18:17:26 +0000 (18:17 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Wed, 30 Mar 2005 18:17:26 +0000 (18:17 +0000)
Split mmu_update() hypercall into mmu_update() and mmuext_op().
All MMUEXT_* ops are now done via the latter hypercall. This allows
more arguments to be passed in a nicer way to mmuext operations.
Linux 2.4, 2.6 and control tools all use the new interface. The BSDs
will need some work, but shouldn't be too hard (and they can be moved
to writable pagetables at the same time :-) ).
Signed-off-by: Keir Fraser <keir@xensource.com>
26 files changed:
freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c
linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c
linux-2.4.29-xen-sparse/mm/memory.c
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
tools/libxc/xc_linux_build.c
tools/libxc/xc_linux_restore.c
tools/libxc/xc_plan9_build.c
tools/libxc/xc_private.c
tools/libxc/xc_private.h
tools/libxc/xc_vmx_build.c
xen/arch/x86/mm.c
xen/arch/x86/x86_32/entry.S
xen/arch/x86/x86_64/entry.S
xen/include/public/xen.h

index 874f87fdda73e32f2fe1c446dc0a90572fdbdecf..4fa020f531e7d55631eb56339bc9290440ab55c3 100644 (file)
@@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma)
     MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping;
     MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va;
     MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma;
-    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG;
+    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL;
     mcl_increment_idx();
 }
 
index 23e762e304482735ffbb2cff90c420c93494d9c9..40d9e4636ebd691e57bbcdfe9756376a7055ced5 100644 (file)
@@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
     PT_UPDATES_FLUSH();
 
     /* After all PTEs have been zapped we blow away stale TLB entries. */
-    xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
 
     /* Give away a batch of pages. */
     xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
index ada06dd973f6bbd512e2cc0eb4818857f4d2cdda..dc9220dfe07644798e4512feefaf3d55747e97f8 100644 (file)
@@ -316,15 +316,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
                __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
                if ( ldt == 0 )
                {
-                   mmu_update_t u;
-                   u.ptr  = MMU_EXTENDED_COMMAND;
-                   u.ptr |= (unsigned long)&default_ldt[0];
-                   u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
-                   if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0) )
-                   {
-                       show_trace(NULL);
-                       panic("Failed to install default LDT");
-                   }
+                    xen_set_ldt((unsigned long)&default_ldt[0], 5);
                    return;
                }
        }
index 34c95c84b55290edb2095f770ee1a472d22889d8..2f3db057d9169e72927be072e4ebf7f9c9a0058e 100644 (file)
@@ -113,12 +113,7 @@ int direct_remap_area_pages(struct mm_struct *mm,
     int i;
     unsigned long start_address;
 #define MAX_DIRECTMAP_MMU_QUEUE 130
-    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
-
-    u[0].ptr  = MMU_EXTENDED_COMMAND;
-    u[0].val  = MMUEXT_SET_FOREIGNDOM;
-    u[0].val |= (unsigned long)domid << 16;
-    v = w = &u[1];
+    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
 
     start_address = address;
 
@@ -130,11 +125,11 @@ int direct_remap_area_pages(struct mm_struct *mm,
            __direct_remap_area_pages( mm,
                                       start_address, 
                                       address-start_address, 
-                                      w);
+                                      u);
            
-           if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 )
+           if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 )
                return -EFAULT;     
-           v = w;
+           v = u;
            start_address = address;
        }
 
@@ -149,14 +144,14 @@ int direct_remap_area_pages(struct mm_struct *mm,
         v++;
     }
 
-    if ( v != w )
+    if ( v != u )
     {
        /* get the ptep's filled in */
        __direct_remap_area_pages(mm,
                                   start_address, 
                                   address-start_address, 
-                                  w);   
-       if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) )
+                                  u);   
+       if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) )
            return -EFAULT;         
     }
     
index 880b6981c4c87e4700794b22533e8db414eed7ed..883a2928ab095f4deb08c12baf6610e32eac8733 100644 (file)
@@ -911,7 +911,7 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr
 {
 #ifdef CONFIG_XEN
        if ( likely(vma->vm_mm == current->mm) ) {
-               HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG);
+               HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL);
        } else {
                set_pte(page_table, entry);
                flush_tlb_page(vma, address);
index f56957f6e67ed1608c6eeba685692826c78218f3..a6615b7e18077f7ffed07a5011d84931acfde386 100644 (file)
@@ -465,14 +465,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
                unsigned long ldt;
                __asm__ __volatile__ ("sldt %0" : "=r" (ldt));
                if (ldt == 0) {
-                       mmu_update_t u;
-                       u.ptr = MMU_EXTENDED_COMMAND;
-                       u.ptr |= (unsigned long)&default_ldt[0];
-                       u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
-                       if (unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0)) {
-                               show_trace(NULL, (unsigned long *)&u);
-                               panic("Failed to install default LDT");
-                       }
+                       xen_set_ldt((unsigned long)&default_ldt[0], 5);
                        return;
                }
        }
index 525576243bcd10ae65d2a6583dfb05cd3ccdab47..1ac796d9fdaee33ef5bab903815bd91a129a36c7 100644 (file)
@@ -52,7 +52,7 @@ void xen_l1_entry_update(pte_t *ptr, unsigned long val)
     mmu_update_t u;
     u.ptr = virt_to_machine(ptr);
     u.val = val;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
@@ -60,79 +60,79 @@ void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
     mmu_update_t u;
     u.ptr = virt_to_machine(ptr);
     u.val = pmd_val_ma(val);
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
-void xen_pt_switch(unsigned long ptr)
+void xen_machphys_update(unsigned long mfn, unsigned long pfn)
 {
     mmu_update_t u;
-    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_NEW_BASEPTR;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+    u.val = pfn;
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pt_switch(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_NEW_BASEPTR;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_tlb_flush(void)
 {
-    mmu_update_t u;
-    u.ptr = MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_TLB_FLUSH;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_invlpg(unsigned long ptr)
 {
-    mmu_update_t u;
-    u.ptr = (ptr & PAGE_MASK) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_INVLPG;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_INVLPG_LOCAL;
+    op.linear_addr = ptr & PAGE_MASK;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pgd_pin(unsigned long ptr)
 {
-    mmu_update_t u;
-    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_PIN_L2_TABLE;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_PIN_L2_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pgd_unpin(unsigned long ptr)
 {
-    mmu_update_t u;
-    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_UNPIN_TABLE;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pte_pin(unsigned long ptr)
 {
-    mmu_update_t u;
-    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_PIN_L1_TABLE;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_PIN_L1_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pte_unpin(unsigned long ptr)
 {
-    mmu_update_t u;
-    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
-    u.val = MMUEXT_UNPIN_TABLE;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_set_ldt(unsigned long ptr, unsigned long len)
 {
-    mmu_update_t u;
-    u.ptr = ptr | MMU_EXTENDED_COMMAND;
-    u.val = (len << MMUEXT_CMD_SHIFT) | MMUEXT_SET_LDT;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
-}
-
-void xen_machphys_update(unsigned long mfn, unsigned long pfn)
-{
-    mmu_update_t u;
-    u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-    u.val = pfn;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
+    struct mmuext_op op;
+    op.cmd = MMUEXT_SET_LDT;
+    op.linear_addr = ptr;
+    op.nr_ents = len;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
index 8a0df417edf2bd30ea59d962d067238d1bef62cc..7e58a230fc730013dbdb4d8a49c8b5181fe99ef9 100644 (file)
@@ -108,7 +108,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
                        if(!PageReserved(page))
                                return NULL;
 
-               domid = DOMID_LOCAL;
+               domid = DOMID_SELF;
        }
 
        /*
@@ -393,15 +393,7 @@ int direct_remap_area_pages(struct mm_struct *mm,
        int i;
        unsigned long start_address;
 #define MAX_DIRECTMAP_MMU_QUEUE 130
-       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
-
-       v = w = &u[0];
-       if (domid != DOMID_LOCAL) {
-               u[0].ptr  = MMU_EXTENDED_COMMAND;
-               u[0].val  = MMUEXT_SET_FOREIGNDOM;
-               u[0].val |= (unsigned long)domid << 16;
-               v = w = &u[1];
-       }
+       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
 
        start_address = address;
 
@@ -413,11 +405,11 @@ int direct_remap_area_pages(struct mm_struct *mm,
                        __direct_remap_area_pages(mm,
                                                  start_address, 
                                                  address-start_address, 
-                                                 w);
+                                                 u);
  
-                       if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
+                       if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
                                return -EFAULT;
-                       v = w;
+                       v = u;
                        start_address = address;
                }
 
@@ -432,13 +424,13 @@ int direct_remap_area_pages(struct mm_struct *mm,
                v++;
        }
 
-       if (v != w) {
+       if (v != u) {
                /* get the ptep's filled in */
                __direct_remap_area_pages(mm,
                                          start_address, 
                                          address-start_address, 
-                                         w);
-               if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
+                                         u);
+               if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
                        return -EFAULT;
        }
 
index ab9fc3c977192116004bdf5664d0099063ad9304..21b9540f19f681a25c2e1e06b277701db138c71f 100644 (file)
@@ -100,7 +100,7 @@ static void fast_flush_area(int idx, int nr_pages)
         mcl[i].args[2] = 0;
     }
 
-    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
         BUG();
 }
index 248711646b18d57217d69ef0fd19f90508fd57ef..9079ea2d47372d47392ff273a5726b914356360a 100644 (file)
@@ -38,8 +38,9 @@ static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
 static struct timer_list net_timer;
 
 static struct sk_buff_head rx_queue;
-static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
-static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
+static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
+static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -195,8 +196,9 @@ static void net_rx_action(unsigned long unused)
     netif_t *netif;
     s8 status;
     u16 size, id, evtchn;
-    mmu_update_t *mmu;
     multicall_entry_t *mcl;
+    mmu_update_t *mmu;
+    struct mmuext_op *mmuext;
     unsigned long vdata, mdata, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
@@ -207,6 +209,7 @@ static void net_rx_action(unsigned long unused)
 
     mcl = rx_mcl;
     mmu = rx_mmu;
+    mmuext = rx_mmuext;
     while ( (skb = skb_dequeue(&rx_queue)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -229,25 +232,26 @@ static void net_rx_action(unsigned long unused)
          */
         phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
         
-        mmu[0].ptr  = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-        mmu[0].val  = __pa(vdata) >> PAGE_SHIFT;  
-        mmu[1].ptr  = MMU_EXTENDED_COMMAND;
-        mmu[1].val  = MMUEXT_SET_FOREIGNDOM;      
-        mmu[1].val |= (unsigned long)netif->domid << 16;
-        mmu[2].ptr  = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
-        mmu[2].val  = MMUEXT_REASSIGN_PAGE;
+        mcl->op = __HYPERVISOR_update_va_mapping;
+        mcl->args[0] = vdata;
+        mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+        mcl->args[2] = 0;
+        mcl++;
 
-        mcl[0].op = __HYPERVISOR_update_va_mapping;
-        mcl[0].args[0] = vdata;
-        mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
-        mcl[0].args[2] = 0;
-        mcl[1].op = __HYPERVISOR_mmu_update;
-        mcl[1].args[0] = (unsigned long)mmu;
-        mcl[1].args[1] = 3;
-        mcl[1].args[2] = 0;
+        mcl->op = __HYPERVISOR_mmuext_op;
+        mcl->args[0] = (unsigned long)mmuext;
+        mcl->args[1] = 1;
+        mcl->args[2] = 0;
+        mcl->args[3] = netif->domid;
+        mcl++;
 
-        mcl += 2;
-        mmu += 3;
+        mmuext->cmd = MMUEXT_REASSIGN_PAGE;
+        mmuext->mfn = mdata >> PAGE_SHIFT;
+        mmuext++;
+
+        mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+        mmu->val = __pa(vdata) >> PAGE_SHIFT;  
+        mmu++;
 
         __skb_queue_tail(&rxq, skb);
 
@@ -259,12 +263,19 @@ static void net_rx_action(unsigned long unused)
     if ( mcl == rx_mcl )
         return;
 
-    mcl[-2].args[2] = UVMF_FLUSH_TLB;
+    mcl->op = __HYPERVISOR_mmu_update;
+    mcl->args[0] = (unsigned long)rx_mmu;
+    mcl->args[1] = mmu - rx_mmu;
+    mcl->args[2] = 0;
+    mcl->args[3] = DOMID_SELF;
+    mcl++;
+
+    mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL;
     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
         BUG();
 
     mcl = rx_mcl;
-    mmu = rx_mmu;
+    mmuext = rx_mmuext;
     while ( (skb = __skb_dequeue(&rxq)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -272,7 +283,7 @@ static void net_rx_action(unsigned long unused)
 
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
-        mdata   = ((mmu[2].ptr & PAGE_MASK) |
+        mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
                    ((unsigned long)skb->data & ~PAGE_MASK));
         
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
@@ -308,7 +319,7 @@ static void net_rx_action(unsigned long unused)
         dev_kfree_skb(skb);
 
         mcl += 2;
-        mmu += 3;
+        mmuext += 1;
     }
 
     while ( notify_nr != 0 )
@@ -418,7 +429,7 @@ static void net_tx_action(unsigned long unused)
         mcl++;     
     }
 
-    mcl[-1].args[2] = UVMF_FLUSH_TLB;
+    mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL;
     if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
         BUG();
 
index 91fe4e59754b3be91b53e10b166a3b5ec49fd9d0..3b27381b9f0b9fd68efd45942a8cf3ab0c7a4c28 100644 (file)
@@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
     }
 
     /* After all PTEs have been zapped we blow away stale TLB entries. */
-    rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL;
 
     /* Give away a batch of pages. */
     rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
@@ -588,6 +588,7 @@ static int netif_poll(struct net_device *dev, int *pbudget)
         mcl->args[0] = (unsigned long)rx_mmu;
         mcl->args[1] = mmu - rx_mmu;
         mcl->args[2] = 0;
+        mcl->args[3] = DOMID_SELF;
         mcl++;
         (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
     }
index 219b218920d12b285411d936e8437bf432c2343e..017ed4a477a55d40cf4427c11f0018d2871034b6 100644 (file)
@@ -98,7 +98,6 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
                 struct vm_area_struct *vma = 
                     find_vma( current->mm, msg[j].va );
 
-
                 if ( !vma )
                     return -EINVAL;
 
@@ -123,8 +122,7 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
 
     case IOCTL_PRIVCMD_MMAPBATCH:
     {
-#define MAX_DIRECTMAP_MMU_QUEUE 130
-        mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
+        mmu_update_t u;
         privcmd_mmapbatch_t m;
         struct vm_area_struct *vma = NULL;
         unsigned long *p, addr;
@@ -145,39 +143,31 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
         if ( (m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end )
         { ret = -EFAULT; goto batch_err; }
 
-        u[0].ptr  = MMU_EXTENDED_COMMAND;
-        u[0].val  = MMUEXT_SET_FOREIGNDOM;
-        u[0].val |= (unsigned long)m.dom << 16;
-        v = w = &u[1];
-
         p = m.arr;
         addr = m.addr;
         for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ )
         {
-
             if ( get_user(mfn, p) )
                 return -EFAULT;
 
-            v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
+            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
 
             __direct_remap_area_pages(vma->vm_mm,
                                       addr, 
                                       PAGE_SIZE, 
-                                      v);
-
-            if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) )
-                put_user( 0xF0000000 | mfn, p );
+                                      &u);
 
-            v = w;
+            if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
+                put_user(0xF0000000 | mfn, p);
         }
 
         ret = 0;
         break;
 
     batch_err:
-        printk(KERN_ALERT "XXX SMH: ERROR IN MMAPBATCH\n"); 
         printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", 
-               ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end);
+               ret, vma, m.addr, m.num, m.arr,
+               vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
         break;
     }
     break;
index b039b4506b06853f732986748f0fcbb7da27dc3a..72a6be3a2f5b62dfddce82325fb76799fcb0889d 100644 (file)
@@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int nr_pages)
         mcl[i].args[2] = 0;
     }
 
-    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
         BUG();
 }
index 325cf468b5db275b89a92315b11efa1cc6e0c757..714a247de381c7f412a8db895e041c023d0813f4 100644 (file)
@@ -407,7 +407,7 @@ extern void noexec_setup(const char *str);
        do {                                                              \
                if (__dirty) {                                            \
                        if ( likely((__vma)->vm_mm == current->mm) ) {    \
-                           HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG); \
+                           HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG_LOCAL); \
                        } else {                                          \
                             xen_l1_entry_update((__ptep), (__entry).pte_low); \
                            flush_tlb_page((__vma), (__address));         \
@@ -455,7 +455,6 @@ void make_pages_writable(void *va, unsigned int nr);
 #define kern_addr_valid(addr)  (1)
 #endif /* !CONFIG_DISCONTIGMEM */
 
-#define DOMID_LOCAL (0xFFFFU)
 int direct_remap_area_pages(struct mm_struct *mm,
                             unsigned long address, 
                             unsigned long machine_addr,
index 5fe90fb87f445a7850c885362923ef5357fb5fc3..9bc3e4997bf822a8cc470ea40412b2b74e442a17 100644 (file)
@@ -126,16 +126,33 @@ HYPERVISOR_set_trap_table(
 
 static inline int
 HYPERVISOR_mmu_update(
-    mmu_update_t *req, int count, int *success_count)
+    mmu_update_t *req, int count, int *success_count, domid_t domid)
 {
     int ret;
-    unsigned long ign1, ign2, ign3;
+    unsigned long ign1, ign2, ign3, ign4;
 
     __asm__ __volatile__ (
         TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
        : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
-         "3" (success_count)
+        "3" (success_count), "4" (domid)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+    struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+       : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+        "3" (success_count), "4" (domid)
        : "memory" );
 
     return ret;
index e1063b1775c0f8e53c8a173a2743facbbd493877..687b21080bc1675f09bb6716b3f9c642ae98333b 100644 (file)
@@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id)
        xpq_flush_queue();
 
        /* After all PTEs have been zapped we blow away stale TLB entries. */
-       rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
+       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
 
        /* Give away a batch of pages. */
        rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
@@ -681,7 +681,7 @@ xen_network_handler(void *arg)
                mcl->op = __HYPERVISOR_update_va_mapping;
                mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
                mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
-               mcl->args[2] = UVMF_FLUSH_TLB; // 0;
+               mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0;
                mcl++;
 
                xpmap_phys_to_machine_mapping
@@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc)
        xpq_flush_queue();
 
        /* After all PTEs have been zapped we blow away stale TLB entries. */
-       rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
+       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
 
        /* Give away a batch of pages. */
        rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
index 5e3d474ab0d98564f89badd87fae0b378baeb62b..469861b786674aecad63d3b33d613140aa175e87 100644 (file)
@@ -254,8 +254,7 @@ static int setup_guest(int xc_handle,
      * Pin down l2tab addr as page dir page - causes hypervisor to provide
      * correct protection for the page
      */ 
-    if ( add_mmu_update(xc_handle, mmu,
-                        l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+    if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
         goto error_out;
 
     start_info = xc_map_foreign_range(
@@ -447,10 +446,16 @@ int xc_linux_build(int xc_handle,
     memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
 
     /* No callback handlers. */
+#if defined(__i386__)
     ctxt->event_callback_cs     = FLAT_KERNEL_CS;
     ctxt->event_callback_eip    = 0;
     ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
     ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+    ctxt->event_callback_eip    = 0;
+    ctxt->failsafe_callback_eip = 0;
+    ctxt->syscall_callback_eip  = 0;
+#endif
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
index 938f219ec76aa485d682691c3bc39e6d263d53bc..220890d3461a9cb6b5e989c7eecf5ce79592e9ef 100644 (file)
@@ -422,10 +422,8 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
     {
         if ( pfn_type[i] == (L1TAB|LPINTAB) )
         {
-            if ( add_mmu_update(xc_handle, mmu,
-                                (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
-                                MMU_EXTENDED_COMMAND,
-                                MMUEXT_PIN_L1_TABLE) ) {
+            if ( pin_table(xc_handle, MMUEXT_PIN_L1_TABLE,
+                           pfn_to_mfn_table[i], dom) ) {
                 printf("ERR pin L1 pfn=%lx mfn=%lx\n",
                        (unsigned long)i, pfn_to_mfn_table[i]);
                 goto out;
@@ -438,11 +436,8 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
     {
         if ( pfn_type[i] == (L2TAB|LPINTAB) )
         {
-            if ( add_mmu_update(xc_handle, mmu,
-                                (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
-                                MMU_EXTENDED_COMMAND,
-                                MMUEXT_PIN_L2_TABLE) )
-            {
+            if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
+                           pfn_to_mfn_table[i], dom) ) {
                 printf("ERR pin L2 pfn=%lx mfn=%lx\n",
                        (unsigned long)i, pfn_to_mfn_table[i]);
                 goto out;
@@ -623,10 +618,12 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
     }
     if ( (ctxt.kernel_ss & 3) == 0 )
         ctxt.kernel_ss = FLAT_KERNEL_DS;
+#if defined(__i386__)
     if ( (ctxt.event_callback_cs & 3) == 0 )
         ctxt.event_callback_cs = FLAT_KERNEL_CS;
     if ( (ctxt.failsafe_callback_cs & 3) == 0 )
         ctxt.failsafe_callback_cs = FLAT_KERNEL_CS;
+#endif
     if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
          (ctxt.ldt_ents > 8192) ||
          (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
index c6778d44bc76ce4fc3b15ecaec69b1d83cec038b..3476136196551b86606776d029646769ecb254c2 100755 (executable)
@@ -314,8 +314,7 @@ setup_guest(int xc_handle,
         * Pin down l2tab addr as page dir page - causes hypervisor to provide
         * correct protection for the page
         */
-       if (add_mmu_update(xc_handle, mmu,
-                          l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE))
+       if (pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom))
                goto error_out;
 
        for (count = 0; count < tot_pages; count++) {
@@ -526,10 +525,16 @@ xc_plan9_build(int xc_handle,
        memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
 
        /* No callback handlers. */
-       ctxt->event_callback_cs = FLAT_KERNEL_CS;
-       ctxt->event_callback_eip = 0;
-       ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
+#if defined(__i386__)
+       ctxt->event_callback_cs     = FLAT_KERNEL_CS;
+       ctxt->event_callback_eip    = 0;
+       ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
+       ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+       ctxt->event_callback_eip    = 0;
        ctxt->failsafe_callback_eip = 0;
+       ctxt->syscall_callback_eip  = 0;
+#endif
 
        memset(&launch_op, 0, sizeof (launch_op));
 
index 37342ac8378d972f06e8feb832ae69a6b670f1f5..386a240178f364cb50d12c8b81ea7ce27be86a67 100644 (file)
@@ -92,24 +92,54 @@ unsigned int get_pfn_type(int xc_handle,
 
 /*******************/
 
-#define FIRST_MMU_UPDATE 1
+int pin_table(
+    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
+{
+    int err = 0;
+    struct mmuext_op op;
+    privcmd_hypercall_t hypercall;
+
+    op.cmd = type;
+    op.mfn = mfn;
+
+    hypercall.op     = __HYPERVISOR_mmuext_op;
+    hypercall.arg[0] = (unsigned long)&op;
+    hypercall.arg[1] = 1;
+    hypercall.arg[2] = 0;
+    hypercall.arg[3] = dom;
+
+    if ( mlock(&op, sizeof(op)) != 0 )
+    {
+        PERROR("Could not lock mmuext_op");
+        err = 1;
+        goto out;
+    }
+
+    if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
+    {
+        ERROR("Failure when submitting mmu updates");
+        err = 1;
+    }
+
+    (void)munlock(&op, sizeof(op));
+
+ out:
+    return err;
+}
 
 static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
 {
     int err = 0;
     privcmd_hypercall_t hypercall;
 
-    if ( mmu->idx == FIRST_MMU_UPDATE )
+    if ( mmu->idx == 0 )
         return 0;
 
-    mmu->updates[0].ptr  = MMU_EXTENDED_COMMAND;
-    mmu->updates[0].val  = MMUEXT_SET_FOREIGNDOM;
-    mmu->updates[0].val |= (unsigned long)mmu->subject << 16;
-
     hypercall.op     = __HYPERVISOR_mmu_update;
     hypercall.arg[0] = (unsigned long)mmu->updates;
     hypercall.arg[1] = (unsigned long)mmu->idx;
     hypercall.arg[2] = 0;
+    hypercall.arg[3] = mmu->subject;
 
     if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
     {
@@ -124,7 +154,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
         err = 1;
     }
 
-    mmu->idx = FIRST_MMU_UPDATE;
+    mmu->idx = 0;
     
     (void)munlock(mmu->updates, sizeof(mmu->updates));
 
@@ -137,7 +167,7 @@ mmu_t *init_mmu_updates(int xc_handle, domid_t dom)
     mmu_t *mmu = malloc(sizeof(mmu_t));
     if ( mmu == NULL )
         return mmu;
-    mmu->idx     = FIRST_MMU_UPDATE;
+    mmu->idx     = 0;
     mmu->subject = dom;
     return mmu;
 }
index b3ad75375a2c6fc339f394a9ed85c799b29b55f9..78f8bbe194c5c528976bb0513718b6969e980a9e 100644 (file)
@@ -212,4 +212,7 @@ void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
                    int xch, u32 dom, unsigned long *parray,
                    unsigned long vstart);
 
+int pin_table(
+    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom);
+
 #endif /* __XC_PRIVATE_H__ */
index b1937b76baa86a4bef52ac9e4f63c3066f0aa2c5..d1692e61f80e30f96c6cad6cb7c62e09b8ad9b07 100644 (file)
@@ -333,8 +333,7 @@ static int setup_guest(int xc_handle,
      * Pin down l2tab addr as page dir page - causes hypervisor to provide
      * correct protection for the page
      */ 
-    if ( add_mmu_update(xc_handle, mmu,
-                        l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+    if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
         goto error_out;
 
     if ((boot_paramsp = xc_map_foreign_range(
@@ -612,10 +611,16 @@ int xc_vmx_build(int xc_handle,
     memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
 
     /* No callback handlers. */
+#if defined(__i386__)
     ctxt->event_callback_cs     = FLAT_KERNEL_CS;
     ctxt->event_callback_eip    = 0;
     ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
     ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+    ctxt->event_callback_eip    = 0;
+    ctxt->failsafe_callback_eip = 0;
+    ctxt->syscall_callback_eip  = 0;
+#endif
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
index c76dd791bc32ed2e762712368f04515fead2043e..946ea05e03736379e96a548ed23bec6cdf87b899 100644 (file)
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
 
+/*
+ * Both do_mmuext_op() and do_mmu_update():
+ * We steal the m.s.b. of the @count parameter to indicate whether this
+ * invocation of do_mmu_update() is resuming a previously preempted call.
+ */
+#define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
+
 static int alloc_l2_table(struct pfn_info *page);
 static int alloc_l1_table(struct pfn_info *page);
 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d);
@@ -128,7 +135,7 @@ static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
 static struct {
 #define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
 #define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
-    unsigned long  deferred_ops;
+    unsigned int   deferred_ops;
     /* If non-NULL, specifies a foreign subject domain for some operations. */
     struct domain *foreign;
 } __cacheline_aligned percpu_info[NR_CPUS];
@@ -199,12 +206,16 @@ void write_ptbase(struct exec_domain *ed)
     write_cr3(pagetable_val(ed->arch.monitor_table));
 }
 
-static void __invalidate_shadow_ldt(struct exec_domain *d)
+
+static inline void invalidate_shadow_ldt(struct exec_domain *d)
 {
     int i;
     unsigned long pfn;
     struct pfn_info *page;
     
+    if ( d->arch.shadow_ldt_mapcnt == 0 )
+        return;
+
     d->arch.shadow_ldt_mapcnt = 0;
 
     for ( i = 16; i < 32; i++ )
@@ -223,13 +234,6 @@ static void __invalidate_shadow_ldt(struct exec_domain *d)
 }
 
 
-static inline void invalidate_shadow_ldt(struct exec_domain *d)
-{
-    if ( d->arch.shadow_ldt_mapcnt != 0 )
-        __invalidate_shadow_ldt(d);
-}
-
-
 static int alloc_segdesc_page(struct pfn_info *page)
 {
     struct desc_struct *descs;
@@ -1251,401 +1255,409 @@ int new_guest_cr3(unsigned long pfn)
     return okay;
 }
 
-static int do_extended_command(unsigned long ptr, unsigned long val)
+static void process_deferred_ops(unsigned int cpu)
 {
-    int okay = 1, cpu = smp_processor_id();
-    unsigned int cmd = val & MMUEXT_CMD_MASK, type;
-    unsigned long pfn = ptr >> PAGE_SHIFT;
-    struct pfn_info *page = &frame_table[pfn];
-    struct exec_domain *ed = current;
-    struct domain *d = ed->domain, *e;
-    u32 x, y, _d, _nd;
-    domid_t domid;
-    grant_ref_t gntref;
-
-    switch ( cmd )
-    {
-    case MMUEXT_PIN_L1_TABLE:
-        /*
-         * We insist that, if you pin an L1 page, it's the first thing that
-         * you do to it. This is because we require the backptr to still be
-         * mutable. This assumption seems safe.
-         */
-        type = PGT_l1_page_table | PGT_va_mutable;
-
-    pin_page:
-        okay = get_page_and_type_from_pagenr(pfn, type, FOREIGNDOM);
-        if ( unlikely(!okay) )
-        {
-            MEM_LOG("Error while pinning pfn %p", pfn);
-            break;
-        }
-
-        if ( unlikely(test_and_set_bit(_PGT_pinned,
-                                       &page->u.inuse.type_info)) )
-        {
-            MEM_LOG("Pfn %p already pinned", pfn);
-            put_page_and_type(page);
-            okay = 0;
-            break;
-        }
-
-        break;
+    unsigned int deferred_ops;
 
-    case MMUEXT_PIN_L2_TABLE:
-        type = PGT_l2_page_table;
-        goto pin_page;
+    deferred_ops = percpu_info[cpu].deferred_ops;
+    percpu_info[cpu].deferred_ops = 0;
 
-#ifdef __x86_64__
-    case MMUEXT_PIN_L3_TABLE:
-        type = PGT_l3_page_table;
-        goto pin_page;
+    if ( deferred_ops & DOP_FLUSH_TLB )
+        local_flush_tlb();
+        
+    if ( deferred_ops & DOP_RELOAD_LDT )
+        (void)map_ldt_shadow_page(0);
 
-    case MMUEXT_PIN_L4_TABLE:
-        type = PGT_l4_page_table;
-        goto pin_page;
-#endif /* __x86_64__ */
+    if ( unlikely(percpu_info[cpu].foreign != NULL) )
+    {
+        put_domain(percpu_info[cpu].foreign);
+        percpu_info[cpu].foreign = NULL;
+    }
+}
 
-    case MMUEXT_UNPIN_TABLE:
-        if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) )
-        {
-            MEM_LOG("Page %p bad domain (dom=%p)",
-                    ptr, page_get_owner(page));
-        }
-        else if ( likely(test_and_clear_bit(_PGT_pinned, 
-                                            &page->u.inuse.type_info)) )
-        {
-            put_page_and_type(page);
-            put_page(page);
-        }
-        else
-        {
-            okay = 0;
-            put_page(page);
-            MEM_LOG("Pfn %p not pinned", pfn);
-        }
-        break;
+static int set_foreigndom(unsigned int cpu, domid_t domid)
+{
+    struct domain *e, *d = current->domain;
+    int okay = 1;
 
-    case MMUEXT_NEW_BASEPTR:
-        okay = new_guest_cr3(pfn);
-        break;
-        
-#ifdef __x86_64__
-    case MMUEXT_NEW_USER_BASEPTR:
-        okay = get_page_and_type_from_pagenr(pfn, PGT_root_page_table, d);
-        if ( unlikely(!okay) )
-        {
-            MEM_LOG("Error while installing new baseptr %p", pfn);
-        }
-        else
-        {
-            unsigned long old_pfn =
-                pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
-            ed->arch.guest_table_user = mk_pagetable(pfn << PAGE_SHIFT);
-            if ( old_pfn != 0 )
-                put_page_and_type(&frame_table[old_pfn]);
-        }
-        break;
-#endif
-        
-    case MMUEXT_TLB_FLUSH:
-        percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
-        break;
+    if ( (e = percpu_info[cpu].foreign) != NULL )
+        put_domain(e);
+    percpu_info[cpu].foreign = NULL;
     
-    case MMUEXT_INVLPG:
-        __flush_tlb_one(ptr);
-        break;
-
-    case MMUEXT_FLUSH_CACHE:
-        if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
-        {
-            MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
-            okay = 0;
-        }
-        else
-        {
-            wbinvd();
-        }
-        break;
+    if ( domid == DOMID_SELF )
+        goto out;
 
-    case MMUEXT_SET_LDT:
+    if ( !IS_PRIV(d) )
     {
-        unsigned long ents = val >> MMUEXT_CMD_SHIFT;
-        if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
-             (ents > 8192) ||
-             ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
-             ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
+        switch ( domid )
         {
+        case DOMID_IO:
+            get_knownalive_domain(dom_io);
+            percpu_info[cpu].foreign = dom_io;
+            break;
+        default:
+            MEM_LOG("Dom %u cannot set foreign dom\n", d->id);
             okay = 0;
-            MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents);
-        }
-        else if ( (ed->arch.ldt_ents != ents) || 
-                  (ed->arch.ldt_base != ptr) )
-        {
-            invalidate_shadow_ldt(ed);
-            ed->arch.ldt_base = ptr;
-            ed->arch.ldt_ents = ents;
-            load_LDT(ed);
-            percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
-            if ( ents != 0 )
-                percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
+            break;
         }
-        break;
     }
-
-    case MMUEXT_SET_FOREIGNDOM:
-        domid = (domid_t)(val >> 16);
-
-        if ( (e = percpu_info[cpu].foreign) != NULL )
-            put_domain(e);
-        percpu_info[cpu].foreign = NULL;
-
-        if ( !IS_PRIV(d) )
+    else
+    {
+        percpu_info[cpu].foreign = e = find_domain_by_id(domid);
+        if ( e == NULL )
         {
             switch ( domid )
             {
+            case DOMID_XEN:
+                get_knownalive_domain(dom_xen);
+                percpu_info[cpu].foreign = dom_xen;
+                break;
             case DOMID_IO:
                 get_knownalive_domain(dom_io);
                 percpu_info[cpu].foreign = dom_io;
                 break;
             default:
-                MEM_LOG("Dom %u cannot set foreign dom\n", d->id);
+                MEM_LOG("Unknown domain '%u'", domid);
                 okay = 0;
                 break;
             }
         }
-        else
+    }
+
+ out:
+    return okay;
+}
+
+int do_mmuext_op(
+    struct mmuext_op *uops,
+    unsigned int count,
+    unsigned int *pdone,
+    unsigned int foreigndom)
+{
+    struct mmuext_op op;
+    int rc = 0, i = 0, okay, cpu = smp_processor_id();
+    unsigned int type, done = 0;
+    struct pfn_info *page;
+    struct exec_domain *ed = current;
+    struct domain *d = ed->domain, *e;
+    u32 x, y, _d, _nd;
+
+    LOCK_BIGLOCK(d);
+
+    cleanup_writable_pagetable(d);
+
+    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+    {
+        count &= ~MMU_UPDATE_PREEMPTED;
+        if ( unlikely(pdone != NULL) )
+            (void)get_user(done, pdone);
+    }
+
+    if ( !set_foreigndom(cpu, foreigndom) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ( unlikely(!array_access_ok(VERIFY_READ, uops, count, sizeof(op))) )
+    {
+        rc = -EFAULT;
+        goto out;
+    }
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( hypercall_preempt_check() )
         {
-            percpu_info[cpu].foreign = e = find_domain_by_id(domid);
-            if ( e == NULL )
-            {
-                switch ( domid )
-                {
-                case DOMID_XEN:
-                    get_knownalive_domain(dom_xen);
-                    percpu_info[cpu].foreign = dom_xen;
-                    break;
-                case DOMID_IO:
-                    get_knownalive_domain(dom_io);
-                    percpu_info[cpu].foreign = dom_io;
-                    break;
-                default:
-                    MEM_LOG("Unknown domain '%u'", domid);
-                    okay = 0;
-                    break;
-                }
-            }
+            rc = hypercall4_create_continuation(
+                __HYPERVISOR_mmuext_op, uops,
+                (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+            break;
         }
-        break;
 
-    case MMUEXT_TRANSFER_PAGE:
-        domid  = (domid_t)(val >> 16);
-        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
-        
-        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
-             unlikely(!pfn_is_ram(pfn)) ||
-             unlikely((e = find_domain_by_id(domid)) == NULL) )
+        if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) )
         {
-            MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
-            okay = 0;
+            MEM_LOG("Bad __copy_from_user");
+            rc = -EFAULT;
             break;
         }
 
-        spin_lock(&d->page_alloc_lock);
+        okay = 1;
+        page = &frame_table[op.mfn];
 
-        /*
-         * The tricky bit: atomically release ownership while there is just one
-         * benign reference to the page (PGC_allocated). If that reference
-         * disappears then the deallocation routine will safely spin.
-         */
-        _d  = pickle_domptr(d);
-        _nd = page->u.inuse._domain;
-        y   = page->count_info;
-        do {
-            x = y;
-            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
-                          (1|PGC_allocated)) ||
-                 unlikely(_nd != _d) )
+        switch ( op.cmd )
+        {
+        case MMUEXT_PIN_L1_TABLE:
+            /*
+             * We insist that, if you pin an L1 page, it's the first thing that
+             * you do to it. This is because we require the backptr to still be
+             * mutable. This assumption seems safe.
+             */
+            type = PGT_l1_page_table | PGT_va_mutable;
+
+        pin_page:
+            okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
+            if ( unlikely(!okay) )
             {
-                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
-                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
-                        d, d->id, unpickle_domptr(_nd), x, 
-                        page->u.inuse.type_info);
-                spin_unlock(&d->page_alloc_lock);
-                put_domain(e);
-                return 0;
+                MEM_LOG("Error while pinning MFN %p", op.mfn);
+                break;
             }
-            __asm__ __volatile__(
-                LOCK_PREFIX "cmpxchg8b %2"
-                : "=d" (_nd), "=a" (y),
-                "=m" (*(volatile u64 *)(&page->count_info))
-                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
-        } 
-        while ( unlikely(_nd != _d) || unlikely(y != x) );
+            
+            if ( unlikely(test_and_set_bit(_PGT_pinned,
+                                           &page->u.inuse.type_info)) )
+            {
+                MEM_LOG("MFN %p already pinned", op.mfn);
+                put_page_and_type(page);
+                okay = 0;
+                break;
+            }
+            
+            break;
 
-        /*
-         * Unlink from 'd'. At least one reference remains (now anonymous), so
-         * noone else is spinning to try to delete this page from 'd'.
-         */
-        d->tot_pages--;
-        list_del(&page->list);
-        
-        spin_unlock(&d->page_alloc_lock);
+        case MMUEXT_PIN_L2_TABLE:
+            type = PGT_l2_page_table;
+            goto pin_page;
 
-        spin_lock(&e->page_alloc_lock);
+#ifdef __x86_64__
+        case MMUEXT_PIN_L3_TABLE:
+            type = PGT_l3_page_table;
+            goto pin_page;
 
-        /*
-         * Check that 'e' will accept the page and has reservation headroom.
-         * Also, a domain mustn't have PGC_allocated pages when it is dying.
-         */
-        ASSERT(e->tot_pages <= e->max_pages);
-        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
-             unlikely(e->tot_pages == e->max_pages) ||
-             unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
-        {
-            MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
-                    "provided a bad grant ref, or is dying (%p).\n",
-                    e->tot_pages, e->max_pages, e->d_flags);
-            spin_unlock(&e->page_alloc_lock);
-            put_domain(e);
-            okay = 0;
+        case MMUEXT_PIN_L4_TABLE:
+            type = PGT_l4_page_table;
+            goto pin_page;
+#endif /* __x86_64__ */
+
+        case MMUEXT_UNPIN_TABLE:
+            if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
+            {
+                MEM_LOG("MFN %p bad domain (dom=%p)",
+                        op.mfn, page_get_owner(page));
+            }
+            else if ( likely(test_and_clear_bit(_PGT_pinned, 
+                                                &page->u.inuse.type_info)) )
+            {
+                put_page_and_type(page);
+                put_page(page);
+            }
+            else
+            {
+                okay = 0;
+                put_page(page);
+                MEM_LOG("MFN %p not pinned", op.mfn);
+            }
             break;
-        }
 
-        /* Okay, add the page to 'e'. */
-        if ( unlikely(e->tot_pages++ == 0) )
-            get_knownalive_domain(e);
-        list_add_tail(&page->list, &e->page_list);
-        page_set_owner(page, e);
+        case MMUEXT_NEW_BASEPTR:
+            okay = new_guest_cr3(op.mfn);
+            break;
+        
+#ifdef __x86_64__
+        case MMUEXT_NEW_USER_BASEPTR:
+            okay = get_page_and_type_from_pagenr(
+                op.mfn, PGT_root_page_table, d);
+            if ( unlikely(!okay) )
+            {
+                MEM_LOG("Error while installing new MFN %p", op.mfn);
+            }
+            else
+            {
+                unsigned long old_mfn =
+                    pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
+                ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
+                if ( old_mfn != 0 )
+                    put_page_and_type(&frame_table[old_mfn]);
+            }
+            break;
+#endif
+        
+        case MMUEXT_TLB_FLUSH_LOCAL:
+            percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
+            break;
+    
+        case MMUEXT_INVLPG_LOCAL:
+            __flush_tlb_one(op.linear_addr);
+            break;
 
-        spin_unlock(&e->page_alloc_lock);
+        case MMUEXT_TLB_FLUSH_MULTI:
+            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
+            break;
+    
+        case MMUEXT_INVLPG_MULTI:
+            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
+            break;
 
-        /* Transfer is all done: tell the guest about its new page frame. */
-        gnttab_notify_transfer(e, gntref, pfn);
-        
-        put_domain(e);
-        break;
+        case MMUEXT_TLB_FLUSH_ALL:
+            flush_tlb_mask(d->cpuset);
+            break;
+    
+        case MMUEXT_INVLPG_ALL:
+            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
+            break;
 
-    case MMUEXT_REASSIGN_PAGE:
-        if ( unlikely(!IS_PRIV(d)) )
-        {
-            MEM_LOG("Dom %u has no reassignment priv", d->id);
-            okay = 0;
+        case MMUEXT_FLUSH_CACHE:
+            if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+            {
+                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+                okay = 0;
+            }
+            else
+            {
+                wbinvd();
+            }
             break;
-        }
 
-        e = percpu_info[cpu].foreign;
-        if ( unlikely(e == NULL) )
+        case MMUEXT_SET_LDT:
         {
-            MEM_LOG("No FOREIGNDOM to reassign pfn %p to", pfn);
-            okay = 0;
+            unsigned long ptr  = op.linear_addr;
+            unsigned long ents = op.nr_ents;
+            if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
+                 (ents > 8192) ||
+                 ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
+                 ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
+            {
+                okay = 0;
+                MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents);
+            }
+            else if ( (ed->arch.ldt_ents != ents) || 
+                      (ed->arch.ldt_base != ptr) )
+            {
+                invalidate_shadow_ldt(ed);
+                ed->arch.ldt_base = ptr;
+                ed->arch.ldt_ents = ents;
+                load_LDT(ed);
+                percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
+                if ( ents != 0 )
+                    percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
+            }
             break;
         }
 
-        /*
-         * Grab both page_list locks, in order. This prevents the page from
-         * disappearing elsewhere while we modify the owner, and we'll need
-         * both locks if we're successful so that we can change lists.
-         */
-        if ( d < e )
-        {
-            spin_lock(&d->page_alloc_lock);
-            spin_lock(&e->page_alloc_lock);
-        }
-        else
-        {
-            spin_lock(&e->page_alloc_lock);
-            spin_lock(&d->page_alloc_lock);
-        }
-
-        /* A domain shouldn't have PGC_allocated pages when it is dying. */
-        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
-             unlikely(IS_XEN_HEAP_FRAME(page)) )
-        {
-            MEM_LOG("Reassignment page is Xen heap, or dest dom is dying.");
-            okay = 0;
-            goto reassign_fail;
-        }
-
-        /*
-         * The tricky bit: atomically change owner while there is just one
-         * benign reference to the page (PGC_allocated). If that reference
-         * disappears then the deallocation routine will safely spin.
-         */
-        _d  = pickle_domptr(d);
-        _nd = page->u.inuse._domain;
-        y   = page->count_info;
-        do {
-            x = y;
-            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
-                          (1|PGC_allocated)) ||
-                 unlikely(_nd != _d) )
+        case MMUEXT_REASSIGN_PAGE:
+            if ( unlikely(!IS_PRIV(d)) )
             {
-                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
-                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
-                        d, d->id, unpickle_domptr(_nd), x,
-                        page->u.inuse.type_info);
+                MEM_LOG("Dom %u has no reassignment priv", d->id);
+                okay = 0;
+                break;
+            }
+            
+            e = percpu_info[cpu].foreign;
+            if ( unlikely(e == NULL) )
+            {
+                MEM_LOG("No FOREIGNDOM to reassign MFN %p to", op.mfn);
+                okay = 0;
+                break;
+            }
+            
+            /*
+             * Grab both page_list locks, in order. This prevents the page from
+             * disappearing elsewhere while we modify the owner, and we'll need
+             * both locks if we're successful so that we can change lists.
+             */
+            if ( d < e )
+            {
+                spin_lock(&d->page_alloc_lock);
+                spin_lock(&e->page_alloc_lock);
+            }
+            else
+            {
+                spin_lock(&e->page_alloc_lock);
+                spin_lock(&d->page_alloc_lock);
+            }
+            
+            /* A domain shouldn't have PGC_allocated pages when it is dying. */
+            if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+                 unlikely(IS_XEN_HEAP_FRAME(page)) )
+            {
+                MEM_LOG("Reassign page is Xen heap, or dest dom is dying.");
                 okay = 0;
                 goto reassign_fail;
             }
-            __asm__ __volatile__(
-                LOCK_PREFIX "cmpxchg8b %3"
-                : "=d" (_nd), "=a" (y), "=c" (e),
-                "=m" (*(volatile u64 *)(&page->count_info))
-                : "0" (_d), "1" (x), "c" (e), "b" (x) );
-        } 
-        while ( unlikely(_nd != _d) || unlikely(y != x) );
-        
-        /*
-         * Unlink from 'd'. We transferred at least one reference to 'e', so
-         * noone else is spinning to try to delete this page from 'd'.
-         */
-        d->tot_pages--;
-        list_del(&page->list);
-        
-        /*
-         * Add the page to 'e'. Someone may already have removed the last
-         * reference and want to remove the page from 'e'. However, we have
-         * the lock so they'll spin waiting for us.
-         */
-        if ( unlikely(e->tot_pages++ == 0) )
-            get_knownalive_domain(e);
-        list_add_tail(&page->list, &e->page_list);
 
-    reassign_fail:        
-        spin_unlock(&d->page_alloc_lock);
-        spin_unlock(&e->page_alloc_lock);
-        break;
+            /*
+             * The tricky bit: atomically change owner while there is just one
+             * benign reference to the page (PGC_allocated). If that reference
+             * disappears then the deallocation routine will safely spin.
+             */
+            _d  = pickle_domptr(d);
+            _nd = page->u.inuse._domain;
+            y   = page->count_info;
+            do {
+                x = y;
+                if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
+                              (1|PGC_allocated)) ||
+                     unlikely(_nd != _d) )
+                {
+                    MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
+                            " caf=%08x, taf=%08x\n", page_to_pfn(page),
+                            d, d->id, unpickle_domptr(_nd), x,
+                            page->u.inuse.type_info);
+                    okay = 0;
+                    goto reassign_fail;
+                }
+                __asm__ __volatile__(
+                    LOCK_PREFIX "cmpxchg8b %3"
+                    : "=d" (_nd), "=a" (y), "=c" (e),
+                    "=m" (*(volatile u64 *)(&page->count_info))
+                    : "0" (_d), "1" (x), "c" (e), "b" (x) );
+            } 
+            while ( unlikely(_nd != _d) || unlikely(y != x) );
+            
+            /*
+             * Unlink from 'd'. We transferred at least one reference to 'e',
+             * so noone else is spinning to try to delete this page from 'd'.
+             */
+            d->tot_pages--;
+            list_del(&page->list);
+            
+            /*
+             * Add the page to 'e'. Someone may already have removed the last
+             * reference and want to remove the page from 'e'. However, we have
+             * the lock so they'll spin waiting for us.
+             */
+            if ( unlikely(e->tot_pages++ == 0) )
+                get_knownalive_domain(e);
+            list_add_tail(&page->list, &e->page_list);
+            
+        reassign_fail:        
+            spin_unlock(&d->page_alloc_lock);
+            spin_unlock(&e->page_alloc_lock);
+            break;
+            
+        default:
+            MEM_LOG("Invalid extended pt command 0x%p", op.cmd);
+            okay = 0;
+            break;
+        }
 
-    case MMUEXT_CLEAR_FOREIGNDOM:
-        if ( (e = percpu_info[cpu].foreign) != NULL )
-            put_domain(e);
-        percpu_info[cpu].foreign = NULL;
-        break;
+        if ( unlikely(!okay) )
+        {
+            rc = -EINVAL;
+            break;
+        }
 
-    default:
-        MEM_LOG("Invalid extended pt command 0x%p", val & MMUEXT_CMD_MASK);
-        okay = 0;
-        break;
+        uops++;
     }
 
-    return okay;
+ out:
+    process_deferred_ops(cpu);
+
+    /* Add incremental work we have done to the @done output parameter. */
+    if ( unlikely(pdone != NULL) )
+        __put_user(done + i, pdone);
+
+    UNLOCK_BIGLOCK(d);
+    return rc;
 }
 
 int do_mmu_update(
-    mmu_update_t *ureqs, unsigned int count, unsigned int *pdone)
+    mmu_update_t *ureqs,
+    unsigned int count,
+    unsigned int *pdone,
+    unsigned int foreigndom)
 {
-/*
- * We steal the m.s.b. of the @count parameter to indicate whether this
- * invocation of do_mmu_update() is resuming a previously preempted call.
- * We steal the next 15 bits to remember the current FOREIGNDOM.
- */
-#define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
-#define MMU_UPDATE_PREEMPT_FDOM_SHIFT ((sizeof(int)*8)-16)
-#define MMU_UPDATE_PREEMPT_FDOM_MASK  (0x7FFFU<<MMU_UPDATE_PREEMPT_FDOM_SHIFT)
-
     mmu_update_t req;
-    unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
+    unsigned long va = 0, pfn, prev_pfn = 0;
     struct pfn_info *page;
     int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
     unsigned int cmd, done = 0;
@@ -1654,7 +1666,6 @@ int do_mmu_update(
     struct exec_domain *ed = current;
     struct domain *d = ed->domain;
     u32 type_info;
-    domid_t domid;
 
     LOCK_BIGLOCK(d);
 
@@ -1666,31 +1677,17 @@ int do_mmu_update(
     if ( unlikely(shadow_mode_translate(d)) )
         domain_crash_synchronous();
 
-    /*
-     * If we are resuming after preemption, read how much work we have already
-     * done. This allows us to set the @done output parameter correctly.
-     * We also reset FOREIGNDOM here.
-     */
-    if ( unlikely(count&(MMU_UPDATE_PREEMPTED|MMU_UPDATE_PREEMPT_FDOM_MASK)) )
+    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
-        if ( !(count & MMU_UPDATE_PREEMPTED) )
-        {
-            /* Count overflow into private FOREIGNDOM field. */
-            MEM_LOG("do_mmu_update count is too large");
-            rc = -EINVAL;
-            goto out;
-        }
         count &= ~MMU_UPDATE_PREEMPTED;
-        domid = count >> MMU_UPDATE_PREEMPT_FDOM_SHIFT;
-        count &= ~MMU_UPDATE_PREEMPT_FDOM_MASK;
         if ( unlikely(pdone != NULL) )
             (void)get_user(done, pdone);
-        if ( (domid != current->domain->id) &&
-             !do_extended_command(0, MMUEXT_SET_FOREIGNDOM | (domid << 16)) )
-        {
-            rc = -EINVAL;
-            goto out;
-        }
+    }
+
+    if ( !set_foreigndom(cpu, foreigndom) )
+    {
+        rc = -EINVAL;
+        goto out;
     }
 
     perfc_incrc(calls_to_mmu_update); 
@@ -1707,11 +1704,9 @@ int do_mmu_update(
     {
         if ( hypercall_preempt_check() )
         {
-            rc = hypercall3_create_continuation(
+            rc = hypercall4_create_continuation(
                 __HYPERVISOR_mmu_update, ureqs, 
-                (count - i) |
-                (FOREIGNDOM->id << MMU_UPDATE_PREEMPT_FDOM_SHIFT) | 
-                MMU_UPDATE_PREEMPTED, pdone);
+                (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
             break;
         }
 
@@ -1863,15 +1858,6 @@ int do_mmu_update(
             put_page(&frame_table[pfn]);
             break;
 
-            /*
-             * MMU_EXTENDED_COMMAND: Extended command is specified
-             * in the least-siginificant bits of the 'value' field.
-             */
-        case MMU_EXTENDED_COMMAND:
-            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
-            okay = do_extended_command(req.ptr, req.val);
-            break;
-
         default:
             MEM_LOG("Invalid page update command %p", req.ptr);
             break;
@@ -1893,20 +1879,7 @@ int do_mmu_update(
     if ( unlikely(prev_spl1e != 0) ) 
         unmap_domain_mem((void *)prev_spl1e);
 
-    deferred_ops = percpu_info[cpu].deferred_ops;
-    percpu_info[cpu].deferred_ops = 0;
-
-    if ( deferred_ops & DOP_FLUSH_TLB )
-        local_flush_tlb();
-        
-    if ( deferred_ops & DOP_RELOAD_LDT )
-        (void)map_ldt_shadow_page(0);
-
-    if ( unlikely(percpu_info[cpu].foreign != NULL) )
-    {
-        put_domain(percpu_info[cpu].foreign);
-        percpu_info[cpu].foreign = NULL;
-    }
+    process_deferred_ops(cpu);
 
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(pdone != NULL) )
@@ -2016,11 +1989,10 @@ int do_update_va_mapping(unsigned long va,
                          unsigned long val, 
                          unsigned long flags)
 {
-    struct exec_domain      *ed  = current;
-    struct domain           *d   = ed->domain;
-    unsigned int             cpu = ed->processor;
-    unsigned long            deferred_ops;
-    int                      rc = 0;
+    struct exec_domain *ed  = current;
+    struct domain      *d   = ed->domain;
+    unsigned int        cpu = ed->processor;
+    int                 rc = 0;
 
     perfc_incrc(calls_to_update_va);
 
@@ -2046,17 +2018,25 @@ int do_update_va_mapping(unsigned long va,
     if ( unlikely(shadow_mode_enabled(d)) )
         update_shadow_va_mapping(va, val, ed, d);
 
-    deferred_ops = percpu_info[cpu].deferred_ops;
-    percpu_info[cpu].deferred_ops = 0;
-
-    if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
-         unlikely(flags & UVMF_FLUSH_TLB) )
+    switch ( flags & UVMF_FLUSH_MASK )
+    {
+    case UVMF_TLB_FLUSH_LOCAL:
         local_flush_tlb();
-    else if ( unlikely(flags & UVMF_INVLPG) )
+        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
+        break;
+    case UVMF_TLB_FLUSH_ALL:
+        flush_tlb_mask(d->cpuset);
+        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
+        break;
+    case UVMF_INVLPG_LOCAL:
         __flush_tlb_one(va);
+        break;
+    case UVMF_INVLPG_ALL:
+        flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
+        break;
+    }
 
-    if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
-        (void)map_ldt_shadow_page(0);
+    process_deferred_ops(cpu);
     
     UNLOCK_BIGLOCK(d);
 
@@ -2084,9 +2064,6 @@ int do_update_va_mapping_otherdomain(unsigned long va,
 
     rc = do_update_va_mapping(va, val, flags);
 
-    put_domain(d);
-    percpu_info[cpu].foreign = NULL;
-
     return rc;
 }
 
@@ -3176,6 +3153,97 @@ void audit_domains_key(unsigned char key)
 
 #endif /* NDEBUG */
 
+/* Graveyard: stuff below may be useful in future. */
+#if 0
+    case MMUEXT_TRANSFER_PAGE:
+        domid  = (domid_t)(val >> 16);
+        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
+        
+        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
+             unlikely(!pfn_is_ram(pfn)) ||
+             unlikely((e = find_domain_by_id(domid)) == NULL) )
+        {
+            MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
+            okay = 0;
+            break;
+        }
+
+        spin_lock(&d->page_alloc_lock);
+
+        /*
+         * The tricky bit: atomically release ownership while there is just one
+         * benign reference to the page (PGC_allocated). If that reference
+         * disappears then the deallocation routine will safely spin.
+         */
+        _d  = pickle_domptr(d);
+        _nd = page->u.inuse._domain;
+        y   = page->count_info;
+        do {
+            x = y;
+            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
+                          (1|PGC_allocated)) ||
+                 unlikely(_nd != _d) )
+            {
+                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
+                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
+                        d, d->id, unpickle_domptr(_nd), x, 
+                        page->u.inuse.type_info);
+                spin_unlock(&d->page_alloc_lock);
+                put_domain(e);
+                return 0;
+            }
+            __asm__ __volatile__(
+                LOCK_PREFIX "cmpxchg8b %2"
+                : "=d" (_nd), "=a" (y),
+                "=m" (*(volatile u64 *)(&page->count_info))
+                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+        } 
+        while ( unlikely(_nd != _d) || unlikely(y != x) );
+
+        /*
+         * Unlink from 'd'. At least one reference remains (now anonymous), so
+         * noone else is spinning to try to delete this page from 'd'.
+         */
+        d->tot_pages--;
+        list_del(&page->list);
+        
+        spin_unlock(&d->page_alloc_lock);
+
+        spin_lock(&e->page_alloc_lock);
+
+        /*
+         * Check that 'e' will accept the page and has reservation headroom.
+         * Also, a domain mustn't have PGC_allocated pages when it is dying.
+         */
+        ASSERT(e->tot_pages <= e->max_pages);
+        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+             unlikely(e->tot_pages == e->max_pages) ||
+             unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
+        {
+            MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
+                    "provided a bad grant ref, or is dying (%p).\n",
+                    e->tot_pages, e->max_pages, e->d_flags);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            okay = 0;
+            break;
+        }
+
+        /* Okay, add the page to 'e'. */
+        if ( unlikely(e->tot_pages++ == 0) )
+            get_knownalive_domain(e);
+        list_add_tail(&page->list, &e->page_list);
+        page_set_owner(page, e);
+
+        spin_unlock(&e->page_alloc_lock);
+
+        /* Transfer is all done: tell the guest about its new page frame. */
+        gnttab_notify_transfer(e, gntref, pfn);
+        
+        put_domain(e);
+        break;
+#endif
+
 /*
  * Local variables:
  * mode: C
index c7192d9f6a1e0ffaf210644a5c2f6e770c91845e..cb3eabb104113dfac539372ac2da708e52a927f0 100644 (file)
@@ -742,6 +742,8 @@ ENTRY(hypercall_table)
         .long SYMBOL_NAME(do_update_va_mapping_otherdomain)
         .long SYMBOL_NAME(do_switch_vm86)
         .long SYMBOL_NAME(do_boot_vcpu)
+        .long SYMBOL_NAME(do_ni_hypercall)       /* 25 */
+        .long SYMBOL_NAME(do_mmuext_op)
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long SYMBOL_NAME(do_ni_hypercall)
         .endr
index e3a522e76af19f418ef34dab5c315f392e4a2967..be6572622ddb72834af7286c5e035e2f794a0226 100644 (file)
@@ -449,6 +449,7 @@ ENTRY(hypercall_table)
         .quad SYMBOL_NAME(do_switch_to_user)
         .quad SYMBOL_NAME(do_boot_vcpu)
         .quad SYMBOL_NAME(do_set_segment_base)   /* 25 */
+        .quad SYMBOL_NAME(do_mmuext_op)
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad SYMBOL_NAME(do_ni_hypercall)
         .endr
index 145dc82a3208411b7b017721319ffeb45a22c812..0ec17675c25d98faf977fc7700cf1fd0495af490 100644 (file)
@@ -58,6 +58,7 @@
 #define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op            26
 
 /*
  * MULTICALLS
  * MMU-UPDATE REQUESTS
  * 
  * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
  * ptr[1:0] specifies the appropriate MMU_* command.
  * 
- * FOREIGN DOMAIN (FD)
- * -------------------
- *  Some commands recognise an explicitly-declared foreign domain,
- *  in which case they will operate with respect to the foreigner rather than
- *  the calling domain. Where the FD has some effect, it is described below.
- * 
  * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
  * Updates an entry in a page table. If updating an L1 table, and the new
  * table entry is valid/present, the mapped frame must belong to the FD, if
  * ptr[:2]  -- Machine address within the frame whose mapping to modify.
  *             The frame must belong to the FD, if one is specified.
  * val      -- Value to write into the mapping entry.
- *  
- * ptr[1:0] == MMU_EXTENDED_COMMAND:
- * val[7:0] -- MMUEXT_* command.
+ */
+#define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
+#define MMU_MACHPHYS_UPDATE      1 /* ptr = MA of frame to modify entry for  */
+
+/*
+ * MMU EXTENDED OPERATIONS
  * 
- *   val[7:0] == MMUEXT_(UN)PIN_*_TABLE:
- *   ptr[:2]  -- Machine address of frame to be (un)pinned as a p.t. page.
- *               The frame must belong to the FD, if one is specified.
+ * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
  * 
- *   val[7:0] == MMUEXT_NEW_BASEPTR:
- *   ptr[:2]  -- Machine address of new page-table base to install in MMU.
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ *      The frame must belong to the FD, if one is specified.
  * 
- *   val[7:0] == MMUEXT_NEW_USER_BASEPTR: [x86/64 only]
- *   ptr[:2]  -- Machine address of new page-table base to install in MMU
- *               when in user space.
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
  * 
- *   val[7:0] == MMUEXT_TLB_FLUSH_LOCAL:
- *   No additional arguments. Flushes local TLB.
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ *      when in user space.
  * 
- *   val[7:0] == MMUEXT_INVLPG_LOCAL:
- *   ptr[:2]  -- Linear address to be flushed from the local TLB.
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
  * 
- *   val[7:0] == MMUEXT_FLUSH_CACHE:
- *   No additional arguments. Writes back and flushes cache contents.
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
  * 
- *   val[7:0] == MMUEXT_SET_LDT:
- *   ptr[:2]  -- Linear address of LDT base (NB. must be page-aligned).
- *   val[:8]  -- Number of entries in LDT.
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * cpuset: Set of VCPUs to be flushed.
  * 
- *   val[7:0] == MMUEXT_TRANSFER_PAGE:
- *   val[31:16] -- Domain to whom page is to be transferred.
- *   (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table.
- *   ptr[:12]  -- Page frame to be reassigned to the FD.
- *                (NB. The frame must currently belong to the calling domain).
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * cpuset: Set of VCPUs to be flushed.
  * 
- *   val[7:0] == MMUEXT_SET_FOREIGNDOM:
- *   val[31:16] -- Domain to set as the Foreign Domain (FD).
- *                 (NB. DOMID_SELF is not recognised)
- *                 If FD != DOMID_IO then the caller must be privileged.
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
  * 
- *   val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
- *   Clears the FD.
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
  * 
- *   val[7:0] == MMUEXT_REASSIGN_PAGE:
- *   ptr[:2]  -- A machine address within the page to be reassigned to the FD.
- *               (NB. page must currently belong to the calling domain).
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
  * 
- *   val[7:0] == MMUEXT_TLB_FLUSH_MULTI:
- *   Flush TLBs of VCPUs specified in @mask.
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
  * 
- *   val[7:0] == MMUEXT_INVLPG_MULTI:
- *   ptr[:2]  -- Linear address to be flushed from TLB of VCPUs in @mask.
+ * cmd: MMUEXT_REASSIGN_PAGE
+ * mfn: Machine frame number to be reassigned to the FD.
+ *      (NB. page must currently belong to the calling domain).
  */
-#define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
-#define MMU_MACHPHYS_UPDATE      2 /* ptr = MA of frame to modify entry for  */
-#define MMU_EXTENDED_COMMAND     3 /* least 8 bits of val demux further      */
 #define MMUEXT_PIN_L1_TABLE      0 /* ptr = MA of frame to pin               */
 #define MMUEXT_PIN_L2_TABLE      1 /* ptr = MA of frame to pin               */
 #define MMUEXT_PIN_L3_TABLE      2 /* ptr = MA of frame to pin               */
 #define MMUEXT_NEW_BASEPTR       5 /* ptr = MA of new pagetable base         */
 #define MMUEXT_TLB_FLUSH_LOCAL   6 /* ptr = NULL                             */
 #define MMUEXT_INVLPG_LOCAL      7 /* ptr = VA to invalidate                 */
-#define MMUEXT_FLUSH_CACHE       8
-#define MMUEXT_SET_LDT           9 /* ptr = VA of table; val = # entries     */
-#define MMUEXT_SET_FOREIGNDOM   10 /* val[31:16] = dom                       */
-#define MMUEXT_CLEAR_FOREIGNDOM 11
-#define MMUEXT_TRANSFER_PAGE    12 /* ptr = MA of frame; val[31:16] = dom    */
-#define MMUEXT_REASSIGN_PAGE    13
-#define MMUEXT_NEW_USER_BASEPTR 14
-#define MMUEXT_TLB_FLUSH_MULTI  15 /* ptr = NULL; mask = VCPUs to flush      */
-#define MMUEXT_INVLPG_MULTI     16 /* ptr = VA to inval.; mask = VCPUs       */
-#define MMUEXT_CMD_MASK        255
-#define MMUEXT_CMD_SHIFT         8
+#define MMUEXT_TLB_FLUSH_MULTI   8 /* ptr = NULL; mask = VCPUs to flush      */
+#define MMUEXT_INVLPG_MULTI      9 /* ptr = VA to inval.; mask = VCPUs       */
+#define MMUEXT_TLB_FLUSH_ALL    10
+#define MMUEXT_INVLPG_ALL       11
+#define MMUEXT_FLUSH_CACHE      12
+#define MMUEXT_SET_LDT          13 /* ptr = VA of table; val = # entries     */
+#define MMUEXT_REASSIGN_PAGE    14
+#define MMUEXT_NEW_USER_BASEPTR 15
 
-/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
-#define UVMF_FLUSH_TLB          1 /* Flush entire TLB. */
-#define UVMF_INVLPG             2 /* Flush the VA mapping being updated. */
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+    unsigned int cmd;
+    union {
+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */
+        memory_t mfn;
+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+        memory_t linear_addr;
+    };
+    union {
+        /* SET_LDT */
+        unsigned int nr_ents;
+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+        unsigned long cpuset;
+    };
+};
+#endif
 
-/* Backwards source compatibility. */
-#define MMUEXT_TLB_FLUSH        MMUEXT_TLB_FLUSH_LOCAL
-#define MMUEXT_INVLPG           MMUEXT_INVLPG_LOCAL
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+#define UVMF_TLB_FLUSH_LOCAL    1 /* Flush local CPU's TLB.          */
+#define UVMF_INVLPG_LOCAL       2 /* Flush VA from local CPU's TLB.  */
+#define UVMF_TLB_FLUSH_ALL      3 /* Flush all TLBs.                 */
+#define UVMF_INVLPG_ALL         4 /* Flush VA from all TLBs.         */
+#define UVMF_FLUSH_MASK         7
 
 /*
  * Commands to HYPERVISOR_sched_op().
@@ -270,7 +278,6 @@ typedef struct
 {
     memory_t ptr;       /* Machine address of PTE. */
     memory_t val;       /* New contents of PTE.    */
-    /*unsigned long mask;*/ /* VCPU mask (certain extended commands). */
 } PACKED mmu_update_t;
 
 /*